import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import plotly.offline as py
import plotly.graph_objs as go
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.arima_model import ARIMA
import itertools
import warnings
warnings.filterwarnings("ignore")
Data = pd.read_csv("bitcoin_price_Training - Training.csv", index_col = "Date")
Data.head()
| Open | High | Low | Close | Volume | Market Cap | |
|---|---|---|---|---|---|---|
| Date | ||||||
| Jul 31, 2017 | 2763.24 | 2889.62 | 2720.61 | 2875.34 | 860,575,000 | 45,535,800,000 |
| Jul 30, 2017 | 2724.39 | 2758.53 | 2644.85 | 2757.18 | 705,943,000 | 44,890,700,000 |
| Jul 29, 2017 | 2807.02 | 2808.76 | 2692.80 | 2726.45 | 803,746,000 | 46,246,700,000 |
| Jul 28, 2017 | 2679.73 | 2897.45 | 2679.73 | 2809.01 | 1,380,100,000 | 44,144,400,000 |
| Jul 27, 2017 | 2538.71 | 2693.32 | 2529.34 | 2671.78 | 789,104,000 | 41,816,500,000 |
Data.shape
(1556, 6)
Data.info()
<class 'pandas.core.frame.DataFrame'> Index: 1556 entries, Jul 31, 2017 to Apr 28, 2013 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Open 1556 non-null float64 1 High 1556 non-null float64 2 Low 1556 non-null float64 3 Close 1556 non-null float64 4 Volume 1556 non-null object 5 Market Cap 1556 non-null object dtypes: float64(4), object(2) memory usage: 85.1+ KB
Data.describe()
| Open | High | Low | Close | |
|---|---|---|---|---|
| count | 1556.000000 | 1556.000000 | 1556.000000 | 1556.000000 |
| mean | 582.625328 | 597.992847 | 567.851446 | 584.239396 |
| std | 523.137312 | 542.992855 | 505.877401 | 525.904442 |
| min | 68.500000 | 74.560000 | 65.530000 | 68.430000 |
| 25% | 254.287500 | 260.327500 | 248.835000 | 254.320000 |
| 50% | 438.600000 | 447.560000 | 430.570000 | 438.855000 |
| 75% | 662.437500 | 674.525000 | 646.735000 | 663.402500 |
| max | 2953.220000 | 2999.910000 | 2840.530000 | 2958.110000 |
Data.isnull().sum()
Open 0 High 0 Low 0 Close 0 Volume 0 Market Cap 0 dtype: int64
Data.hist(figsize = (15,8))
plt.show()
Data.index = pd.to_datetime(Data.index)
Data = Data.sort_index()
Data.head()
| Open | High | Low | Close | Volume | Market Cap | |
|---|---|---|---|---|---|---|
| Date | ||||||
| 2013-04-28 | 135.30 | 135.98 | 132.10 | 134.21 | - | 1,500,520,000 |
| 2013-04-29 | 134.44 | 147.49 | 134.00 | 144.54 | - | 1,491,160,000 |
| 2013-04-30 | 144.00 | 146.93 | 134.05 | 139.00 | - | 1,597,780,000 |
| 2013-05-01 | 139.00 | 139.89 | 107.72 | 116.99 | - | 1,542,820,000 |
| 2013-05-02 | 116.38 | 125.60 | 92.28 | 105.21 | - | 1,292,190,000 |
sns.set_style("whitegrid")
plt.figure(figsize = (25,10))
Data['Close'].plot()
plt.title('Daily Bitcoin Price Distribution Plot', fontsize = 18)
plt.xlabel('Year', fontsize = 16)
plt.ylabel('Daily Bitcoin Price', fontsize = 16)
Text(0, 0.5, 'Daily Bitcoin Price')
weekly = Data['Close'].resample('W').sum()
sns.set_style("whitegrid")
plt.figure(figsize = (22,8))
weekly.plot()
plt.title('Weekly Bitcoin Price Distribution Plot', fontsize = 18)
plt.xlabel('Year', fontsize = 16)
plt.ylabel('Weekly Bitcoin Price', fontsize = 16)
Text(0, 0.5, 'Weekly Bitcoin Price')
Month = Data['Close'].groupby(Data['Close'].index.month).mean()
sns.set_style("whitegrid")
plt.figure(figsize = (22,8))
Month.plot()
plt.title('Monthly Bitcoin Price Distribution Plot', fontsize = 18)
plt.xlabel('Year', fontsize = 16)
plt.ylabel('Monthly Bitcoin Price', fontsize = 16)
Text(0, 0.5, 'Monthly Bitcoin Price')
Quarter = Data['Close'].groupby(Data['Close'].index.quarter)
plt.figure(figsize = (22,8))
Quarter.plot()
plt.title('Quarterly Bitcoin Price Distribution Plot', fontsize = 18)
plt.xlabel('Year', fontsize = 16)
plt.ylabel('Quarterly Bitcoin Price', fontsize = 16)
Text(0, 0.5, 'Quarterly Bitcoin Price')
Weekend = np.where(Data['Close'].index.weekday < 5, 'Weekday', 'Weekend')
Time = Data['Close'].groupby([Weekend, Data['Close'].index.year]).mean()
fig, ax = plt.subplots(1, 2, figsize = (20, 5))
Time.loc['Weekday'].plot(ax = ax[0], title = 'Weekdays')
Time.loc['Weekend'].plot(ax = ax[1], title = 'Weekends')
<AxesSubplot:title={'center':'Weekends'}, xlabel='Date'>
ts = Data['Close']
plt.figure(figsize = (25,10))
plt.plot(ts)
[<matplotlib.lines.Line2D at 0x228656001c8>]
from statsmodels.tsa.stattools import adfuller
def test_for_stationary(input_data):
r_mean = input_data.rolling(window = 7, center = False).mean()
r_std = input_data.rolling(window = 7, center = False).std()
# plotting the data
plt.figure(figsize = (25,10))
given = plt.plot(input_data, color = 'blue', label = 'given_series')
rolling_mean = plt.plot(r_mean, color = 'red', label = 'rolling_mean')
rolling_std = plt.plot(r_std, color ='green', label = 'rolling_std')
plt.legend(loc = 'best')
plt.title('Rolling Mean & Standard Deviation')
plt.show(block = False)
#Perform Dickey-Fuller test:
print('Results of Dickey-Fuller Test:')
dftest = adfuller(input_data)
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
print(dfoutput)
test_for_stationary(ts)
Results of Dickey-Fuller Test: Test Statistic 2.535589 p-value 0.999060 #Lags Used 24.000000 Number of Observations Used 1531.000000 Critical Value (1%) -3.434628 Critical Value (5%) -2.863430 Critical Value (10%) -2.567776 dtype: float64
ts_logtransformed = np.log(ts)
plt.figure(figsize = (25,10))
plt.plot(ts_logtransformed)
[<matplotlib.lines.Line2D at 0x228653b0308>]
ts_logtransformed.head(10)
Date 2013-04-28 4.899406 2013-04-29 4.973556 2013-04-30 4.934474 2013-05-01 4.762088 2013-05-02 4.655958 2013-05-03 4.582413 2013-05-04 4.722953 2013-05-05 4.752814 2013-05-06 4.721174 2013-05-07 4.714025 Name: Close, dtype: float64
Rolling_average = ts_logtransformed.rolling(window = 7, center = False).mean()
plt.figure(figsize = (25,10))
plt.plot(ts_logtransformed, label = 'Log Transformed')
plt.plot(Rolling_average, color = 'red', label = 'Rolling Average')
plt.legend(loc = 'best')
<matplotlib.legend.Legend at 0x22864ca6c48>
Rolling_average.head(10)
Date 2013-04-28 NaN 2013-04-29 NaN 2013-04-30 NaN 2013-05-01 NaN 2013-05-02 NaN 2013-05-03 NaN 2013-05-04 4.790121 2013-05-05 4.769180 2013-05-06 4.733125 2013-05-07 4.701632 Name: Close, dtype: float64
log_Rolling_difference = ts_logtransformed - Rolling_average
log_Rolling_difference.head(10)
log_Rolling_difference.tail(10)
Date 2017-07-22 0.151593 2017-07-23 0.073239 2017-07-24 0.051861 2017-07-25 -0.030130 2017-07-26 -0.063797 2017-07-27 -0.001463 2017-07-28 0.041254 2017-07-29 0.015741 2017-07-30 0.025554 2017-07-31 0.061402 Name: Close, dtype: float64
log_Rolling_difference.dropna(inplace = True)
plt.figure(figsize = (25,10))
plt.plot(log_Rolling_difference)
plt.title('Log Rolling Difference Distribution Plot', fontsize = 25)
plt.xlabel('Year', fontsize = 16)
Text(0.5, 0, 'Year')
test_for_stationary(log_Rolling_difference)
Results of Dickey-Fuller Test: Test Statistic -7.191326e+00 p-value 2.499260e-10 #Lags Used 2.200000e+01 Number of Observations Used 1.527000e+03 Critical Value (1%) -3.434640e+00 Critical Value (5%) -2.863435e+00 Critical Value (10%) -2.567779e+00 dtype: float64
Expwighted_avg = ts_logtransformed.ewm(halflife = 7, min_periods = 0, adjust = True, ignore_na = False).mean()
plt.figure(figsize = (25,10))
plt.plot(ts_logtransformed, label = 'Log transfomed')
plt.plot(Expwighted_avg, color = 'red', label = 'exponential weighted average')
plt.title('Log transfomed & Exponential Weighted Moving Average Plot', fontsize = 25)
plt.legend(loc = 'best')
<matplotlib.legend.Legend at 0x228641f0248>
Expwighted_avg.head(10)
Date 2013-04-28 4.899406 2013-04-29 4.938315 2013-04-30 4.936906 2013-05-01 4.886513 2013-05-02 4.830850 2013-05-03 4.778564 2013-05-04 4.768079 2013-05-05 4.765448 2013-05-06 4.758372 2013-05-07 4.751720 Name: Close, dtype: float64
log_expmovwt_diff = ts_logtransformed - Expwighted_avg
test_for_stationary(log_expmovwt_diff)
Results of Dickey-Fuller Test: Test Statistic -5.442038 p-value 0.000003 #Lags Used 23.000000 Number of Observations Used 1532.000000 Critical Value (1%) -3.434626 Critical Value (5%) -2.863428 Critical Value (10%) -2.567775 dtype: float64
plt.figure(figsize = (25,10))
ts_logtransformed.plot()
<AxesSubplot:xlabel='Date'>
We can substract last months data to the presents (giving a gap of 30 days) and first months data would not be available for modeling.
Trying to difference different types of seasonality and test for statioanarized data.
ts_diff_logtrans = ts_logtransformed - ts_logtransformed.shift(7)
plt.figure(figsize = (25,10))
plt.plot(ts_diff_logtrans)
ts_diff_logtrans.head(10)
Date 2013-04-28 NaN 2013-04-29 NaN 2013-04-30 NaN 2013-05-01 NaN 2013-05-02 NaN 2013-05-03 NaN 2013-05-04 NaN 2013-05-05 -0.146592 2013-05-06 -0.252382 2013-05-07 -0.220449 Name: Close, dtype: float64
plt.figure(figsize = (25,10))
ts_diff_logtrans.dropna(inplace = True)
test_for_stationary(ts_diff_logtrans)
<Figure size 1800x720 with 0 Axes>
Results of Dickey-Fuller Test: Test Statistic -6.523574e+00 p-value 1.027774e-08 #Lags Used 2.300000e+01 Number of Observations Used 1.525000e+03 Critical Value (1%) -3.434645e+00 Critical Value (5%) -2.863437e+00 Critical Value (10%) -2.567780e+00 dtype: float64
Decomposition = seasonal_decompose(ts_logtransformed)
Trend = Decomposition.trend
Seasonal = Decomposition.seasonal
Residual = Decomposition.resid
plt.figure(figsize = (25,15))
plt.subplot(411)
plt.plot(ts_logtransformed, label = 'Original')
plt.title('Original', fontsize = 25)
plt.legend(loc = 'best')
plt.subplot(412)
plt.plot(Trend, label = 'Trend')
plt.title('Trend', fontsize = 25)
plt.legend(loc = 'best')
plt.subplot(413)
plt.plot(Seasonal, label = 'Seasonality')
plt.title('Seasonality', fontsize = 25)
plt.legend(loc = 'best')
plt.subplot(414)
plt.plot(Residual, label = 'Residuals')
plt.title('Residuals', fontsize = 25)
plt.legend(loc = 'best')
plt.tight_layout()
Decomposed_TS = Residual
Decomposed_TS.dropna(inplace = True)
test_for_stationary(Decomposed_TS)
Results of Dickey-Fuller Test: Test Statistic -1.256779e+01 p-value 2.035387e-23 #Lags Used 2.300000e+01 Number of Observations Used 1.526000e+03 Critical Value (1%) -3.434642e+00 Critical Value (5%) -2.863436e+00 Critical Value (10%) -2.567779e+00 dtype: float64
lag_acf = acf(ts_diff_logtrans, nlags = 30)
lag_pacf = pacf(ts_diff_logtrans, nlags = 50, method = 'ols')
plt.figure(figsize = (28,5))
plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y = 0, linestyle = '--', color = 'gray')
plt.axhline(y = -1.96/np.sqrt(len(ts_diff_logtrans)), linestyle = '--', color = 'gray')
plt.axhline(y = 1.96/np.sqrt(len(ts_diff_logtrans)), linestyle = '--', color = 'gray')
plt.title('Autocorrelation Function', fontsize = 20)
Text(0.5, 1.0, 'Autocorrelation Function')
plt.figure(figsize = (28,5))
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y = 0, linestyle = '--', color = 'gray')
plt.axhline(y = -1.96/np.sqrt(len(ts_diff_logtrans)), linestyle = '--', color = 'gray')
plt.axhline(y = 1.96/np.sqrt(len(ts_diff_logtrans)), linestyle = '--', color = 'gray')
plt.title('Partial Autocorrelation Function', fontsize = 20)
plt.tight_layout()
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from matplotlib import pyplot
plt.figure(figsize = (28,15))
pyplot.subplot(211)
plot_acf(ts_diff_logtrans, ax = pyplot.gca(), lags = 40)
plt.title('Autocorrelation', fontsize = 25)
pyplot.subplot(212)
plot_pacf(ts_diff_logtrans, ax = pyplot.gca(), lags = 50)
plt.title('Trend', fontsize = 25)
pyplot.show()
from statsmodels.tsa.arima_model import ARIMA
ts_diff_logtrans = ts_diff_logtrans.fillna(0)
model = ARIMA(ts_logtransformed, order = (8, 1, 0))
results_AR = model.fit(disp = -1)
plt.figure(figsize = (28,8))
plt.plot(ts_diff_logtrans)
plt.plot(results_AR.fittedvalues, color = 'red', label = 'order 8')
RSS = results_AR.fittedvalues-ts_diff_logtrans
RSS.dropna(inplace = True)
plt.title('RSS: %.4f'% sum(RSS**2), fontsize = 25)
plt.legend(loc = 'best')
<matplotlib.legend.Legend at 0x22868a2e248>
model = ARIMA(ts_logtransformed, order = (2, 1, 0))
results_AR = model.fit(disp = -1)
plt.figure(figsize = (28,8))
plt.plot(ts_diff_logtrans)
plt.plot(results_AR.fittedvalues, color = 'red', label = 'order 2')
RSS = results_AR.fittedvalues - ts_diff_logtrans
RSS.dropna(inplace = True)
plt.title('RSS: %.4f'% sum(RSS**2), fontsize = 25)
plt.legend(loc ='best')
<matplotlib.legend.Legend at 0x2286883ccc8>
print(results_AR.summary())
ARIMA Model Results
==============================================================================
Dep. Variable: D.Close No. Observations: 1555
Model: ARIMA(2, 1, 0) Log Likelihood 2704.690
Method: css-mle S.D. of innovations 0.042
Date: Thu, 24 Jun 2021 AIC -5401.380
Time: 21:09:31 BIC -5379.984
Sample: 04-29-2013 HQIC -5393.424
- 07-31-2017
=================================================================================
coef std err z P>|z| [0.025 0.975]
---------------------------------------------------------------------------------
const 0.0020 0.001 1.908 0.056 -5.37e-05 0.004
ar.L1.D.Close -0.0011 0.025 -0.044 0.965 -0.051 0.049
ar.L2.D.Close -0.0435 0.025 -1.715 0.086 -0.093 0.006
Roots
=============================================================================
Real Imaginary Modulus Frequency
-----------------------------------------------------------------------------
AR.1 -0.0127 -4.7948j 4.7948 -0.2504
AR.2 -0.0127 +4.7948j 4.7948 0.2504
-----------------------------------------------------------------------------
model = ARIMA(ts_logtransformed, order = (0, 1,18))
results_MA = model.fit(disp = -1)
plt.figure(figsize = (28,8))
plt.plot(ts_diff_logtrans)
plt.plot(results_MA.fittedvalues, color = 'red')
RSS = results_MA.fittedvalues - ts_diff_logtrans
RSS.dropna(inplace = True)
plt.title('RSS: %.4f'% sum(RSS**2), fontsize = 25)
Text(0.5, 1.0, 'RSS: 19.0817')
print(results_MA.summary())
ARIMA Model Results
==============================================================================
Dep. Variable: D.Close No. Observations: 1555
Model: ARIMA(0, 1, 18) Log Likelihood 2727.323
Method: css-mle S.D. of innovations 0.042
Date: Thu, 24 Jun 2021 AIC -5414.646
Time: 21:10:32 BIC -5307.661
Sample: 04-29-2013 HQIC -5374.862
- 07-31-2017
==================================================================================
coef std err z P>|z| [0.025 0.975]
----------------------------------------------------------------------------------
const 0.0019 0.001 1.374 0.169 -0.001 0.005
ma.L1.D.Close -0.0100 0.026 -0.390 0.697 -0.060 0.040
ma.L2.D.Close -0.0373 0.025 -1.469 0.142 -0.087 0.012
ma.L3.D.Close -0.0222 0.026 -0.871 0.384 -0.072 0.028
ma.L4.D.Close 0.0759 0.026 2.962 0.003 0.026 0.126
ma.L5.D.Close 0.0271 0.026 1.039 0.299 -0.024 0.078
ma.L6.D.Close 0.0821 0.026 3.142 0.002 0.031 0.133
ma.L7.D.Close -0.0306 0.027 -1.151 0.250 -0.083 0.022
ma.L8.D.Close -0.0069 0.026 -0.262 0.793 -0.058 0.045
ma.L9.D.Close -0.0087 0.027 -0.326 0.745 -0.061 0.043
ma.L10.D.Close 0.0441 0.026 1.685 0.092 -0.007 0.095
ma.L11.D.Close 0.0772 0.025 3.047 0.002 0.028 0.127
ma.L12.D.Close -0.0106 0.028 -0.381 0.703 -0.065 0.044
ma.L13.D.Close 0.0372 0.025 1.480 0.139 -0.012 0.086
ma.L14.D.Close 0.0325 0.028 1.147 0.251 -0.023 0.088
ma.L15.D.Close 0.0299 0.029 1.030 0.303 -0.027 0.087
ma.L16.D.Close 0.0025 0.027 0.091 0.927 -0.051 0.056
ma.L17.D.Close 0.0832 0.027 3.048 0.002 0.030 0.137
ma.L18.D.Close -0.0335 0.028 -1.211 0.226 -0.088 0.021
Roots
==============================================================================
Real Imaginary Modulus Frequency
------------------------------------------------------------------------------
MA.1 -1.1293 -0.0000j 1.1293 -0.5000
MA.2 -1.0360 -0.4363j 1.1241 -0.4366
MA.3 -1.0360 +0.4363j 1.1241 0.4366
MA.4 -0.8723 -0.7427j 1.1457 -0.3877
MA.5 -0.8723 +0.7427j 1.1457 0.3877
MA.6 -0.5234 -0.9971j 1.1261 -0.3269
MA.7 -0.5234 +0.9971j 1.1261 0.3269
MA.8 -0.1031 -1.1561j 1.1607 -0.2642
MA.9 -0.1031 +1.1561j 1.1607 0.2642
MA.10 0.2555 -1.1746j 1.2020 -0.2159
MA.11 0.2555 +1.1746j 1.2020 0.2159
MA.12 0.6969 -0.9478j 1.1764 -0.1491
MA.13 0.6969 +0.9478j 1.1764 0.1491
MA.14 1.0915 -0.2689j 1.1242 -0.0384
MA.15 1.0915 +0.2689j 1.1242 0.0384
MA.16 0.9436 -0.6872j 1.1673 -0.1002
MA.17 0.9436 +0.6872j 1.1673 0.1002
MA.18 2.7060 -0.0000j 2.7060 -0.0000
------------------------------------------------------------------------------
plt.figure(figsize = (28,8))
plt.plot(ts_logtransformed, label = 'log_tranfromed_data')
plt.plot(results_MA.resid, color ='green', label= 'Residuals')
plt.title('MA Model Residual plot', fontsize = 25)
plt.legend(loc = 'best')
<matplotlib.legend.Legend at 0x22868927dc8>
plt.figure(figsize = (28,8))
results_MA.resid.plot(kind = 'kde')
plt.title('Density plot of the residual error values', fontsize = 25)
print(results_MA.resid.describe())
count 1555.000000 mean 0.000053 std 0.041911 min -0.246817 25% -0.013552 50% 0.000411 75% 0.015719 max 0.332676 dtype: float64
model = ARIMA(ts_logtransformed, order = (8, 1, 18))
results_ARIMA = model.fit(trend = 'nc', disp = -1)
plt.figure(figsize = (28,8))
plt.plot(ts_diff_logtrans)
plt.plot(results_ARIMA.fittedvalues, color = 'red', label = 'p = 8, q = 18')
RSS = results_ARIMA.fittedvalues - ts_diff_logtrans
RSS.dropna(inplace = True)
plt.title('RSS: %.4f'% sum(RSS**2), fontsize = 25)
plt.legend(loc ='best')
<matplotlib.legend.Legend at 0x22868a39b08>
model = ARIMA(ts_logtransformed, order = (20, 1, 18))
results_ARIMA = model.fit(disp = -1)
plt.figure(figsize = (28,8))
plt.plot(ts_diff_logtrans)
plt.plot(results_ARIMA.fittedvalues, color = 'red', label = 'order 15')
RSS = results_ARIMA.fittedvalues - ts_diff_logtrans
RSS.dropna(inplace = True)
plt.title('RSS: %.4f'% sum(RSS**2), fontsize = 25)
plt.legend(loc = 'best')
<matplotlib.legend.Legend at 0x228687ede48>
import warnings
def evaluate_arima_model(data_set, arima_order):
model = ARIMA(data_set, order = arima_order)
results_ARIMA = model.fit(disp = -1)
RSS_diff = results_ARIMA.fittedvalues - ts_diff_logtrans
RSS = RSS_diff**2
return RSS
def evaluate_models(dataset, p_values, d_values):
best_score, best_cfg = float("inf"), None
for p in p_values:
for d in d_values:
order = (p,d,18)
try:
rss = evaluate_arima_model(dataset, order)
if rss < best_score:
best_score, best_cfg = rss, order
print('ARIMA%s RSS = %.3f' % (order,rss))
except:
continue
print('Best ARIMA%s RSS = %.3f' % (best_cfg, best_score))
p_values = range(8,20,3)
d_values = range(0,3)
warnings.filterwarnings('ignore')
evaluate_models(ts_logtransformed, p_values, d_values)
print(results_ARIMA.summary())
ARIMA Model Results
==============================================================================
Dep. Variable: D.Close No. Observations: 1555
Model: ARIMA(20, 1, 18) Log Likelihood 2760.928
Method: css-mle S.D. of innovations 0.041
Date: Thu, 24 Jun 2021 AIC -5441.855
Time: 21:43:42 BIC -5227.886
Sample: 04-29-2013 HQIC -5362.287
- 07-31-2017
==================================================================================
coef std err z P>|z| [0.025 0.975]
----------------------------------------------------------------------------------
const 0.0019 0.001 1.699 0.089 -0.000 0.004
ar.L1.D.Close -0.4467 nan nan nan nan nan
ar.L2.D.Close -0.1229 nan nan nan nan nan
ar.L3.D.Close -0.2390 0.454 -0.527 0.598 -1.129 0.651
ar.L4.D.Close 0.1178 0.205 0.576 0.565 -0.283 0.519
ar.L5.D.Close -0.0715 nan nan nan nan nan
ar.L6.D.Close 0.0043 0.106 0.041 0.968 -0.204 0.213
ar.L7.D.Close -0.0015 0.250 -0.006 0.995 -0.492 0.489
ar.L8.D.Close -0.2024 0.005 -42.019 0.000 -0.212 -0.193
ar.L9.D.Close 0.3719 nan nan nan nan nan
ar.L10.D.Close -0.1473 0.162 -0.910 0.363 -0.465 0.170
ar.L11.D.Close 0.2946 0.121 2.431 0.015 0.057 0.532
ar.L12.D.Close 0.0954 nan nan nan nan nan
ar.L13.D.Close -0.4890 0.107 -4.587 0.000 -0.698 -0.280
ar.L14.D.Close -0.3887 0.340 -1.145 0.252 -1.054 0.277
ar.L15.D.Close -0.4496 nan nan nan nan nan
ar.L16.D.Close -0.2003 nan nan nan nan nan
ar.L17.D.Close -0.0201 0.295 -0.068 0.946 -0.599 0.559
ar.L18.D.Close -0.4198 0.143 -2.935 0.003 -0.700 -0.139
ar.L19.D.Close -0.0176 nan nan nan nan nan
ar.L20.D.Close 0.0123 0.043 0.283 0.777 -0.073 0.098
ma.L1.D.Close 0.4417 nan nan nan nan nan
ma.L2.D.Close 0.0816 nan nan nan nan nan
ma.L3.D.Close 0.2140 0.463 0.462 0.644 -0.694 1.122
ma.L4.D.Close -0.0704 0.171 -0.413 0.680 -0.405 0.264
ma.L5.D.Close 0.1129 nan nan nan nan nan
ma.L6.D.Close 0.0843 0.150 0.563 0.574 -0.209 0.378
ma.L7.D.Close 0.0270 0.230 0.117 0.907 -0.424 0.478
ma.L8.D.Close 0.1710 nan nan nan nan nan
ma.L9.D.Close -0.4107 nan nan nan nan nan
ma.L10.D.Close 0.1578 0.143 1.107 0.268 -0.122 0.437
ma.L11.D.Close -0.2143 0.141 -1.524 0.128 -0.490 0.061
ma.L12.D.Close -0.0797 0.037 -2.149 0.032 -0.152 -0.007
ma.L13.D.Close 0.5303 0.125 4.241 0.000 0.285 0.775
ma.L14.D.Close 0.4385 0.308 1.425 0.154 -0.164 1.041
ma.L15.D.Close 0.4079 nan nan nan nan nan
ma.L16.D.Close 0.2013 0.188 1.071 0.284 -0.167 0.570
ma.L17.D.Close 0.0618 0.291 0.212 0.832 -0.509 0.633
ma.L18.D.Close 0.4806 0.065 7.386 0.000 0.353 0.608
Roots
==============================================================================
Real Imaginary Modulus Frequency
------------------------------------------------------------------------------
AR.1 -0.9836 -0.2321j 1.0106 -0.4631
AR.2 -0.9836 +0.2321j 1.0106 0.4631
AR.3 -0.9725 -0.3834j 1.0453 -0.4402
AR.4 -0.9725 +0.3834j 1.0453 0.4402
AR.5 -0.6005 -0.8028j 1.0026 -0.3522
AR.6 -0.6005 +0.8028j 1.0026 0.3522
AR.7 -0.3902 -1.0177j 1.0899 -0.3083
AR.8 -0.3902 +1.0177j 1.0899 0.3083
AR.9 -0.0175 -1.0112j 1.0114 -0.2528
AR.10 -0.0175 +1.0112j 1.0114 0.2528
AR.11 0.3639 -0.9429j 1.0107 -0.1914
AR.12 0.3639 +0.9429j 1.0107 0.1914
AR.13 0.9846 -0.2056j 1.0059 -0.0328
AR.14 0.9846 +0.2056j 1.0059 0.0328
AR.15 0.8026 -0.6127j 1.0098 -0.1038
AR.16 0.8026 +0.6127j 1.0098 0.1038
AR.17 0.7839 -1.0131j 1.2810 -0.1452
AR.18 0.7839 +1.0131j 1.2810 0.1452
AR.19 -5.1725 -0.0000j 5.1725 -0.5000
AR.20 6.6624 -0.0000j 6.6624 -0.0000
MA.1 -0.9804 -0.2237j 1.0056 -0.4643
MA.2 -0.9804 +0.2237j 1.0056 0.4643
MA.3 -0.9472 -0.3906j 1.0245 -0.4377
MA.4 -0.9472 +0.3906j 1.0245 0.4377
MA.5 -0.6046 -0.8058j 1.0074 -0.3524
MA.6 -0.6046 +0.8058j 1.0074 0.3524
MA.7 -0.4202 -1.0100j 1.0939 -0.3127
MA.8 -0.4202 +1.0100j 1.0939 0.3127
MA.9 -0.0220 -1.0009j 1.0012 -0.2535
MA.10 -0.0220 +1.0009j 1.0012 0.2535
MA.11 0.3583 -0.9570j 1.0219 -0.1930
MA.12 0.3583 +0.9570j 1.0219 0.1930
MA.13 0.9777 -0.2106j 1.0002 -0.0338
MA.14 0.9777 +0.2106j 1.0002 0.0338
MA.15 0.7949 -0.6098j 1.0019 -0.1041
MA.16 0.7949 +0.6098j 1.0019 0.1041
MA.17 0.7791 -0.9637j 1.2392 -0.1418
MA.18 0.7791 +0.9637j 1.2392 0.1418
------------------------------------------------------------------------------
plt.figure(figsize = (28,8))
plt.plot(ts_logtransformed, label = 'log_tranfromed_data')
plt.plot(results_ARIMA.resid, color = 'green', label = 'Residuals')
plt.title('ARIMA Model Residual plot', fontsize = 25)
plt.legend(loc = 'best')
<matplotlib.legend.Legend at 0x22865f4ec08>
plt.figure(figsize = (28,8))
results_ARIMA.resid.plot(kind = 'kde')
plt.title('Density plot of the residual error values', fontsize = 25)
print(results_ARIMA.resid.describe())
count 1555.000000 mean 0.000059 std 0.041069 min -0.249158 25% -0.014723 50% -0.000411 75% 0.017250 max 0.315323 dtype: float64
test = pd.read_csv("bitcoin_price_1week_Test - Test.csv", index_col = 'Date')
test.index = pd.to_datetime(test.index)
test = test['Close']
test = test.sort_index()
test
Date 2017-08-01 2718.26 2017-08-02 2710.67 2017-08-03 2804.73 2017-08-04 2895.89 2017-08-05 3252.91 2017-08-06 3213.94 2017-08-07 3378.94 Name: Close, dtype: float64
predictions_ARIMA_diff = pd.Series(results_ARIMA.fittedvalues, copy = True)
print(predictions_ARIMA_diff.head())
Date 2013-04-29 0.001927 2013-04-30 0.001943 2013-05-01 -0.001062 2013-05-02 0.002360 2013-05-03 0.013765 dtype: float64
predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum()
print(predictions_ARIMA_diff_cumsum.head())
Date 2013-04-29 0.001927 2013-04-30 0.003870 2013-05-01 0.002808 2013-05-02 0.005167 2013-05-03 0.018933 dtype: float64
predictions_ARIMA_log = pd.Series(ts_logtransformed.iloc[0], index = ts_logtransformed.index)
predictions_ARIMA_log = predictions_ARIMA_log.add(predictions_ARIMA_diff_cumsum, fill_value = 0)
predictions_ARIMA_log.head()
Date 2013-04-28 4.899406 2013-04-29 4.901333 2013-04-30 4.903276 2013-05-01 4.902213 2013-05-02 4.904573 dtype: float64
predictions_ARIMA = np.exp(predictions_ARIMA_log)
plt.figure(figsize = (28,8))
plt.plot(Data['Close'])
plt.plot(predictions_ARIMA)
plt.title('RMSE: %.4f'% np.sqrt(sum((predictions_ARIMA - Data['Close'])**2)/len(Data['Close'])), fontsize = 25)
Text(0.5, 1.0, 'RMSE: 461.7778')
dates = [pd.Timestamp('2017-08-01'), pd.Timestamp('2017-08-02'), pd.Timestamp('2017-08-03'),pd.Timestamp('2017-08-04'),
pd.Timestamp('2017-08-05'), pd.Timestamp('2017-08-06'), pd.Timestamp('2017-08-07')]
forecast = pd.Series(results_ARIMA.forecast(steps = 7)[0],dates)
forecast = np.exp(forecast)
print(forecast)
error = mean_squared_error(test, forecast)
print('Test MSE: %.3f' % error)
2017-08-01 2892.847097 2017-08-02 2970.767299 2017-08-03 3001.740586 2017-08-04 3051.358362 2017-08-05 2982.134914 2017-08-06 3060.281868 2017-08-07 2854.266421 dtype: float64 Test MSE: 76189.597
plt.figure(figsize = (28,8))
plt.plot(forecast, color = 'green', label = 'Predicted rates')
plt.plot(test, color = 'red', label = 'Observed from test data')
plt.title('RMSE: %.4f'% np.sqrt(sum((forecast - test)**2)/len(Data)), fontsize = 25)
plt.legend(loc = 'best')
<matplotlib.legend.Legend at 0x2286415a9c8>
predictions_MA_diff = pd.Series(results_MA.fittedvalues, copy = True)
print(predictions_MA_diff.head())
Date 2013-04-29 0.001941 2013-04-30 0.001458 2013-05-01 0.000081 2013-05-02 0.002704 2013-05-03 0.013811 dtype: float64
predictions_MA_diff_cumsum = predictions_MA_diff.cumsum()
print(predictions_MA_diff_cumsum.head())
Date 2013-04-29 0.001941 2013-04-30 0.003399 2013-05-01 0.003479 2013-05-02 0.006184 2013-05-03 0.019995 dtype: float64
predictions_MA_log = pd.Series(ts_logtransformed.iloc[0], index = ts_logtransformed.index)
predictions_MA_log = predictions_MA_log.add(predictions_MA_diff_cumsum, fill_value = 0)
predictions_MA_log.head()
Date 2013-04-28 4.899406 2013-04-29 4.901347 2013-04-30 4.902805 2013-05-01 4.902885 2013-05-02 4.905590 dtype: float64
predictions_MA = np.exp(predictions_MA_log)
plt.figure(figsize = (28,8))
plt.plot(Data['Close'])
plt.plot(predictions_MA)
plt.title('RMSE: %.4f'% np.sqrt(sum((predictions_MA - Data['Close'])**2)/len(Data['Close'])), fontsize = 25)
Text(0.5, 1.0, 'RMSE: 362.5189')
dates = [pd.Timestamp('2017-08-01'), pd.Timestamp('2017-08-02'), pd.Timestamp('2017-08-03'),pd.Timestamp('2017-08-04'),
pd.Timestamp('2017-08-05'), pd.Timestamp('2017-08-06'), pd.Timestamp('2017-08-07')]
forecast = pd.Series(results_MA.forecast(steps = 7)[0],dates)
forecast = np.exp(forecast)
print(forecast)
error = mean_squared_error(test, forecast)
print('Test MSE: %.3f' % error)
2017-08-01 2873.760820 2017-08-02 2933.263249 2017-08-03 2985.844075 2017-08-04 3001.085298 2017-08-05 2982.756171 2017-08-06 3057.337952 2017-08-07 3063.277791 dtype: float64 Test MSE: 44963.792
plt.figure(figsize = (28,8))
plt.plot(forecast, color = 'green', label ='Predicted rates')
plt.plot(test, color = 'red', label = 'Observed from test data')
plt.title('RMSE: %.4f'% np.sqrt(sum((forecast - test)**2)/len(Data['Close'])), fontsize = 25)
plt.legend(loc = 'best')
<matplotlib.legend.Legend at 0x228643158c8>
monthly_mean = Data['Close'].resample('M').mean()
monthly_mean
print(monthly_mean.head(13))
plt.figure(figsize = (25,8))
monthly_mean.plot()
Date 2013-04-30 139.250000 2013-05-31 119.993226 2013-06-30 107.761333 2013-07-31 90.512258 2013-08-31 113.905161 2013-09-30 130.061667 2013-10-31 158.311935 2013-11-30 550.420667 2013-12-31 800.780968 2014-01-31 844.168387 2014-02-28 661.618214 2014-03-31 592.200323 2014-04-30 461.362000 Freq: M, Name: Close, dtype: float64
<AxesSubplot:xlabel='Date'>
test_logtransformed = np.log(test)
history = [x for x in ts_logtransformed]
predictions = list()
for t in range(len(test)):
output = results_MA.forecast()
yhat = output[0]
predictions.append(yhat)
obs = test_logtransformed[t]
history.append(obs)
print('predicted = %f, expected = %f' % (yhat, obs))
error = mean_squared_error(test_logtransformed, predictions)
print('Test MSE: %.3f' % error)
predicted = 7.963377, expected = 7.907747 predicted = 7.963377, expected = 7.904951 predicted = 7.963377, expected = 7.939063 predicted = 7.963377, expected = 7.971048 predicted = 7.963377, expected = 8.087305 predicted = 7.963377, expected = 8.075253 predicted = 7.963377, expected = 8.125317 Test MSE: 0.009
from fbprophet import Prophet
Data['Close'].head()
Date 2013-04-28 134.21 2013-04-29 144.54 2013-04-30 139.00 2013-05-01 116.99 2013-05-02 105.21 Name: Close, dtype: float64
data_prophet = Data['Close'].copy()
data_prophet = pd.DataFrame(data_prophet)
data_prophet.reset_index(drop = False, inplace = True)
data_prophet.columns = ['ds','y']
data_prophet
| ds | y | |
|---|---|---|
| 0 | 2013-04-28 | 134.21 |
| 1 | 2013-04-29 | 144.54 |
| 2 | 2013-04-30 | 139.00 |
| 3 | 2013-05-01 | 116.99 |
| 4 | 2013-05-02 | 105.21 |
| ... | ... | ... |
| 1551 | 2017-07-27 | 2671.78 |
| 1552 | 2017-07-28 | 2809.01 |
| 1553 | 2017-07-29 | 2726.45 |
| 1554 | 2017-07-30 | 2757.18 |
| 1555 | 2017-07-31 | 2875.34 |
1556 rows × 2 columns
m = Prophet()
m.fit(data_prophet)
future = m.make_future_dataframe(periods = 7, freq = 'D')
forecast = m.predict(future)
m.plot(forecast)
Data['Close'].plot()
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
<AxesSubplot:xlabel='Date', ylabel='y'>
m.plot_components(forecast)
forecast.columns
Index(['ds', 'trend', 'yhat_lower', 'yhat_upper', 'trend_lower', 'trend_upper',
'additive_terms', 'additive_terms_lower', 'additive_terms_upper',
'weekly', 'weekly_lower', 'weekly_upper', 'yearly', 'yearly_lower',
'yearly_upper', 'multiplicative_terms', 'multiplicative_terms_lower',
'multiplicative_terms_upper', 'yhat'],
dtype='object')
forecasted_values = forecast[['ds', 'yhat']].tail(10)
forecasted_values
| ds | yhat | |
|---|---|---|
| 1553 | 2017-07-29 | 2575.791433 |
| 1554 | 2017-07-30 | 2582.001515 |
| 1555 | 2017-07-31 | 2592.370954 |
| 1556 | 2017-08-01 | 2598.950558 |
| 1557 | 2017-08-02 | 2601.592098 |
| 1558 | 2017-08-03 | 2608.201670 |
| 1559 | 2017-08-04 | 2608.768867 |
| 1560 | 2017-08-05 | 2611.157156 |
| 1561 | 2017-08-06 | 2613.621105 |
| 1562 | 2017-08-07 | 2620.352805 |
forecasted_values = forecasted_values.set_index('ds')
forecasted_values.columns = ['y']
forecasted_values
| y | |
|---|---|
| ds | |
| 2017-07-29 | 2575.791433 |
| 2017-07-30 | 2582.001515 |
| 2017-07-31 | 2592.370954 |
| 2017-08-01 | 2598.950558 |
| 2017-08-02 | 2601.592098 |
| 2017-08-03 | 2608.201670 |
| 2017-08-04 | 2608.768867 |
| 2017-08-05 | 2611.157156 |
| 2017-08-06 | 2613.621105 |
| 2017-08-07 | 2620.352805 |
forecasted_values.plot(figsize = (15, 5))
plt.show()
print("The Mean Squared Error of our forecasts is", mean_squared_error(forecasted_values['y'][:7], test))
print("The Root Mean Squared Error of our forecasts is", np.sqrt(mean_squared_error(forecasted_values['y'][:7], test)))
The Mean Squared Error of our forecasts is 222059.967905185 The Root Mean Squared Error of our forecasts is 471.23239267391733